package org.swu.swuse.main;

import org.swu.swuse.Pipeline.DataBasePipeline;
import org.swu.swuse.Processor.SpiderProcessor;
import org.swu.swuse.model.WebSpiderConfig;

import us.codecraft.webmagic.Spider;

/**
 * 单配置爬虫入口<br >
 * 主要用于爬虫规则的测试 正式环境请使用AutoMain的入口方法
 * 
 * @author zhanjingbo
 */
public class Main {
	public static void main(String[] args) {

		WebSpiderConfig config = new WebSpiderConfig();
		config.setName("樟树林论坛");
		config.setUrl("http://bbs.swu.edu.cn");
		config.setTargetRegular(
				"(http://bbs\\.swu\\.edu\\.cn/forum\\.php\\?.*(?<!(doc|docx|xls|xlsx|rar|zip|jpg|png|pdf|rmvb|flv|ppt|pptx))$)");
		config.setContentRegular("(http://bbs\\.swu\\.edu\\.cn/forum\\.php\\?mod=viewthread.*)");
		config.setTitleRegular("//span[@id='thread_subject']/text()");
		config.setTextRegular("//body");
		config.setDateRegular("//div[@class='disp']/text()");
		config.setSdfRegular("yyyy");

		Spider.create(new SpiderProcessor(config)).addUrl(config.getUrl()).addPipeline(new DataBasePipeline()).thread(1)
				.run();
	}
}
